import bs4
import requests
from bs4 import BeautifulSoup
from openpyxl import workbook

def getHtml(url):
    header={
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36"
    }
    res=requests.get(url,headers=header)
    res.encoding="UTF-8"
    return res.text

def finllUnivlst(ulist,html):
    soup=BeautifulSoup(html,"html.parser")
    for tr in soup.find("tbody").children:
        if isinstance(tr,bs4.element.Tag):
            tds=tr('td')
            ulist.append([tds[0].text.strip(),
                          tds[1].text.strip(),
                          tds[2].text.strip(),
                          tds[3].text.strip(),
                          tds[4].text.strip(),
                          ])
    return ulist

"""
将提取出来的数据写入Excel表中或则写入数据库中
"""
def save_UnivList(ulist,num):
    fn=r"D:\桌面\作业下载文件\Chinauniversityrank.xlsx"
    wb=workbook.Workbook()
    ws=wb.worksheets[0]
    ws.title="2022中国大学生排名"
    ws.append(["2022排名","学校名称","省份","类型","分数"])
    for i in range(num):
        u=ulist[i]
        ws.append(u)
    wb.save(fn)

if __name__=='__main__':
    page_url="https://www.shanghairanking.cn/rankings/bcur/2021"
    page_code=getHtml(page_url)
    ulist=[]
    univlist=finllUnivlst(ulist,page_code)
    save_UnivList(univlist,30)